{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-11-16T14:29:50.731803Z",
     "start_time": "2019-11-16T14:29:50.715887Z"
    }
   },
   "source": [
    "# Lists of S&P 500 companies historical components\n",
    "\n",
    "1. Read 'S&P 500 Historical Components & Changes.csv' which contains historical S&P 500 index membership since 1996.  Each row contains 2 fields: date and tickers; tickers is a comma delimited string of the symbols for that specified date.  This file was provided as open source via download for the book \"Trading Evolved\" by Andreas F. Clenow.  \n",
    "2. Clean up the list by fixing up symbols, removing duplicates in each row, and sorting the symbols  \n",
    "3. Read in 'sp500_changes_since_2019.csv' which contains the changes to the index since 2019  \n",
    "4. Create new rows in the historical membership list for these changes  \n",
    "5. Compare the last row to current list of S&P 500 components on the wikipedia  \n",
    "6. A list of differences is generated.  If this list is NOT empty, then changes have been made to the index that should be included in 'sp500_changes_since_2019.csv'.  \n",
    "7. Write out date stamped 'S&P 500 Historical Components & Changes' csv file"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-04-21T03:37:48.983781Z",
     "start_time": "2020-04-21T03:37:47.965842Z"
    }
   },
   "outputs": [],
   "source": [
    "from datetime import datetime\n",
    "import os\n",
    "import pandas as pd\n",
    "\n",
    "pd.options.mode.chained_assignment = None  # default='warn'\n",
    "pd.set_option('display.max_rows', 600)\n",
    "\n",
    "# -*- encoding: utf-8 -*-\n",
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-04-21T03:37:49.012882Z",
     "start_time": "2020-04-21T03:37:48.991884Z"
    }
   },
   "outputs": [
    {
     "data": {
      "application/javascript": [
       "IPython.OutputArea.prototype._should_scroll = function(lines) {\n",
       "    return false;\n",
       "}\n"
      ],
      "text/plain": [
       "<IPython.core.display.Javascript object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "%%javascript\n",
    "IPython.OutputArea.prototype._should_scroll = function(lines) {\n",
    "    return false;\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-04-21T03:37:49.036298Z",
     "start_time": "2020-04-21T03:37:49.020815Z"
    }
   },
   "outputs": [],
   "source": [
    "def get_table(filename):\n",
    "\n",
    "    if os.path.isfile(filename):\n",
    "        df = pd.read_csv(filename, index_col='date')\n",
    "        return df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-04-21T03:37:49.348506Z",
     "start_time": "2020-04-21T03:37:49.043763Z"
    },
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>tickers</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>date</th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1996-01-02</th>\n",
       "      <td>TMC-200006,AAL-199702,AAMRQ-201312,AAPL,ABI-20...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1996-01-03</th>\n",
       "      <td>AAL-199702,AAMRQ-201312,AAPL,ABI-200811,ABS-20...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1996-01-04</th>\n",
       "      <td>TMC-200006,AAL-199702,AAMRQ-201312,AAPL,OAT-20...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1996-01-10</th>\n",
       "      <td>TMC-200006,AAL-199702,AAMRQ-201312,AAPL,ABI-20...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1996-01-11</th>\n",
       "      <td>FLMIQ-200408,FLTWQ-200907,FMC,FMCC,FNMA,FTL.A-...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                      tickers\n",
       "date                                                         \n",
       "1996-01-02  TMC-200006,AAL-199702,AAMRQ-201312,AAPL,ABI-20...\n",
       "1996-01-03  AAL-199702,AAMRQ-201312,AAPL,ABI-200811,ABS-20...\n",
       "1996-01-04  TMC-200006,AAL-199702,AAMRQ-201312,AAPL,OAT-20...\n",
       "1996-01-10  TMC-200006,AAL-199702,AAMRQ-201312,AAPL,ABI-20...\n",
       "1996-01-11  FLMIQ-200408,FLTWQ-200907,FMC,FMCC,FNMA,FTL.A-..."
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "filename = 'S&P 500 Historical Components & Changes.csv'\n",
    "df = get_table(filename)\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-04-21T03:37:49.732737Z",
     "start_time": "2020-04-21T03:37:49.356673Z"
    },
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>tickers</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>date</th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2019-01-03</th>\n",
       "      <td>[A, AAL, AAP, AAPL, ABBV, ABC, ABMD, ABT, ACN,...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2019-01-08</th>\n",
       "      <td>[A, AAL, AAP, AAPL, ABBV, ABC, ABMD, ABT, ACN,...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2019-01-09</th>\n",
       "      <td>[A, AAL, AAP, AAPL, ABBV, ABC, ABMD, ABT, ACN,...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2019-01-10</th>\n",
       "      <td>[A, AAL, AAP, AAPL, ABBV, ABC, ABMD, ABT, ACN,...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2019-01-11</th>\n",
       "      <td>[A, AAL, AAP, AAPL, ABBV, ABC, ABMD, ABT, ACN,...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                      tickers\n",
       "date                                                         \n",
       "2019-01-03  [A, AAL, AAP, AAPL, ABBV, ABC, ABMD, ABT, ACN,...\n",
       "2019-01-08  [A, AAL, AAP, AAPL, ABBV, ABC, ABMD, ABT, ACN,...\n",
       "2019-01-09  [A, AAL, AAP, AAPL, ABBV, ABC, ABMD, ABT, ACN,...\n",
       "2019-01-10  [A, AAL, AAP, AAPL, ABBV, ABC, ABMD, ABT, ACN,...\n",
       "2019-01-11  [A, AAL, AAP, AAPL, ABBV, ABC, ABMD, ABT, ACN,..."
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Convert ticker column from csv to list, then sort.\n",
    "df['tickers'] = df['tickers'].apply(lambda x: sorted(x.split(',')))\n",
    "df.tail()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [],
   "source": [
    "l = list(df['tickers'].head(100))[0]\n",
    "# l"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-04-21T03:37:50.681807Z",
     "start_time": "2020-04-21T03:37:49.742193Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>tickers</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>date</th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1996-01-02</th>\n",
       "      <td>[AAL, AAMRQ, AAPL, ABI, ABS, ABT, ABX, ACKH, A...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1996-01-03</th>\n",
       "      <td>[AAL, AAMRQ, AAPL, ABI, ABS, ABT, ABX, ACKH, A...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1996-01-04</th>\n",
       "      <td>[AAL, AAMRQ, AAPL, ABI, ABS, ABT, ABX, ACKH, A...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1996-01-10</th>\n",
       "      <td>[AAL, AAMRQ, AAPL, ABI, ABS, ABT, ABX, ACKH, A...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1996-01-11</th>\n",
       "      <td>[AAL, AAMRQ, AAPL, ABI, ABS, ABT, ABX, ACKH, A...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                      tickers\n",
       "date                                                         \n",
       "1996-01-02  [AAL, AAMRQ, AAPL, ABI, ABS, ABT, ABX, ACKH, A...\n",
       "1996-01-03  [AAL, AAMRQ, AAPL, ABI, ABS, ABT, ABX, ACKH, A...\n",
       "1996-01-04  [AAL, AAMRQ, AAPL, ABI, ABS, ABT, ABX, ACKH, A...\n",
       "1996-01-10  [AAL, AAMRQ, AAPL, ABI, ABS, ABT, ABX, ACKH, A...\n",
       "1996-01-11  [AAL, AAMRQ, AAPL, ABI, ABS, ABT, ABX, ACKH, A..."
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Replace SYMBOL-yyyymm with SYMBOL.\n",
    "df['tickers'] = [[ticker.split('-')[0] for ticker in tickers] for tickers in df['tickers']]\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "504"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "l = list(df['tickers'].tail(1))[0]\n",
    "len(l)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "True"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Add LIN after 2018-10-31\n",
    "df.loc[df.index > '2018-10-31', 'tickers'].apply(lambda x: x.append('LIN'))\n",
    "'LIN' in df['tickers'].values[-1]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-04-21T03:37:51.690181Z",
     "start_time": "2020-04-21T03:37:50.687145Z"
    },
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>tickers</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>date</th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2019-01-03</th>\n",
       "      <td>[A, AAL, AAP, AAPL, ABBV, ABC, ABMD, ABT, ACN,...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2019-01-08</th>\n",
       "      <td>[A, AAL, AAP, AAPL, ABBV, ABC, ABMD, ABT, ACN,...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2019-01-09</th>\n",
       "      <td>[A, AAL, AAP, AAPL, ABBV, ABC, ABMD, ABT, ACN,...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2019-01-10</th>\n",
       "      <td>[A, AAL, AAP, AAPL, ABBV, ABC, ABMD, ABT, ACN,...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2019-01-11</th>\n",
       "      <td>[A, AAL, AAP, AAPL, ABBV, ABC, ABMD, ABT, ACN,...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                      tickers\n",
       "date                                                         \n",
       "2019-01-03  [A, AAL, AAP, AAPL, ABBV, ABC, ABMD, ABT, ACN,...\n",
       "2019-01-08  [A, AAL, AAP, AAPL, ABBV, ABC, ABMD, ABT, ACN,...\n",
       "2019-01-09  [A, AAL, AAP, AAPL, ABBV, ABC, ABMD, ABT, ACN,...\n",
       "2019-01-10  [A, AAL, AAP, AAPL, ABBV, ABC, ABMD, ABT, ACN,...\n",
       "2019-01-11  [A, AAL, AAP, AAPL, ABBV, ABC, ABMD, ABT, ACN,..."
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Remove duplicates in each row.\n",
    "df['tickers'] = [sorted(list(set(tickers))) for tickers in df['tickers']]\n",
    "df.tail()\n",
    "# num = []\n",
    "# for i, row in df.iterrows():\n",
    "#     l = list(row['tickers'])\n",
    "#     num.append(len(l))\n",
    "#     print(i, len(l))\n",
    "# pd.Series(num).describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "498"
      ]
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "l = list(df['tickers'].head(3100))[1500]\n",
    "len(l)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Changes to the list of S&P 500 components\n",
    "https://en.wikipedia.org/wiki/List_of_S%26P_500_companies#Selected_changes_to_the_list_of_S&P_500_components"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-04-21T03:37:51.780372Z",
     "start_time": "2020-04-21T03:37:51.701077Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>add</th>\n",
       "      <th>remove</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>date</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2019-01-18</th>\n",
       "      <td>[TFX]</td>\n",
       "      <td>[PCG]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2019-02-15</th>\n",
       "      <td>[ATO]</td>\n",
       "      <td>[NFX]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2019-02-27</th>\n",
       "      <td>[WAB]</td>\n",
       "      <td>[GT]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2019-04-02</th>\n",
       "      <td>[DOW]</td>\n",
       "      <td>[BHF]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2019-06-01</th>\n",
       "      <td>[LHX]</td>\n",
       "      <td>[HRS]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2019-06-03</th>\n",
       "      <td>[CTVA, DD]</td>\n",
       "      <td>[DWDP, FLR]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2019-06-07</th>\n",
       "      <td>[AMCR]</td>\n",
       "      <td>[MAT]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2019-07-01</th>\n",
       "      <td>[MKTX]</td>\n",
       "      <td>[LLL]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2019-07-15</th>\n",
       "      <td>[TMUS]</td>\n",
       "      <td>[RHT]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2019-08-08</th>\n",
       "      <td>[GL]</td>\n",
       "      <td>[TMK]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2019-08-09</th>\n",
       "      <td>[IEX, LDOS]</td>\n",
       "      <td>[APC, FL]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2019-09-23</th>\n",
       "      <td>[CDW]</td>\n",
       "      <td>[TSS]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2019-09-26</th>\n",
       "      <td>[NVR]</td>\n",
       "      <td>[JEF]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2019-10-03</th>\n",
       "      <td>[LVS]</td>\n",
       "      <td>[NKTR]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2019-10-18</th>\n",
       "      <td>[BKR]</td>\n",
       "      <td>[BHGE]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2019-11-05</th>\n",
       "      <td>[NLOK, PEAK]</td>\n",
       "      <td>[HCP, SYMC]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2019-11-21</th>\n",
       "      <td>[NOW]</td>\n",
       "      <td>[CELG]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2019-12-05</th>\n",
       "      <td>[VIAC, WRB]</td>\n",
       "      <td>[CBS, VIAB]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2019-12-09</th>\n",
       "      <td>[ODFL, TFC]</td>\n",
       "      <td>[BBT, STI]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2019-12-10</th>\n",
       "      <td>[J]</td>\n",
       "      <td>[JEC]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2019-12-23</th>\n",
       "      <td>[LYV, STE, ZBRA]</td>\n",
       "      <td>[AMG, MAC, TRIP]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2020-01-28</th>\n",
       "      <td>[PAYC]</td>\n",
       "      <td>[WCG]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2020-03-03</th>\n",
       "      <td>[TT]</td>\n",
       "      <td>[XEC]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2020-04-03</th>\n",
       "      <td>[CARR, OTIS, RTX]</td>\n",
       "      <td>[UTX]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2020-04-06</th>\n",
       "      <td>[HWM]</td>\n",
       "      <td>[ARNC, M, RTN]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2020-05-12</th>\n",
       "      <td>[DPZ, DXCM]</td>\n",
       "      <td>[AGN, CPRI]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2020-05-22</th>\n",
       "      <td>[WST]</td>\n",
       "      <td>[HP]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2020-06-22</th>\n",
       "      <td>[BIO, TDY, TYL]</td>\n",
       "      <td>[ADS, HOG, JWN]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2020-09-18</th>\n",
       "      <td>[LUMN]</td>\n",
       "      <td>[CTL]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2020-09-21</th>\n",
       "      <td>[CTLT, ETSY, TER]</td>\n",
       "      <td>[COTY, HRB, KSS]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2020-10-07</th>\n",
       "      <td>[POOL]</td>\n",
       "      <td>[ETFC]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2020-10-12</th>\n",
       "      <td>[VNT]</td>\n",
       "      <td>[NBL]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2020-11-17</th>\n",
       "      <td>[VTRS]</td>\n",
       "      <td>[MYL]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2020-12-21</th>\n",
       "      <td>[TSLA]</td>\n",
       "      <td>[AIV]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2021-01-07</th>\n",
       "      <td>[ENPH]</td>\n",
       "      <td>[TIF]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2021-01-21</th>\n",
       "      <td>[TRMB]</td>\n",
       "      <td>[CXO]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2021-02-12</th>\n",
       "      <td>[MPWR]</td>\n",
       "      <td>[FTI]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2021-03-22</th>\n",
       "      <td>[CZR, GNRC, NXPI, PENN]</td>\n",
       "      <td>[FLS, SLG, VNT, XRX]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2021-04-20</th>\n",
       "      <td>[PTC]</td>\n",
       "      <td>[VAR]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2021-05-14</th>\n",
       "      <td>[CRL]</td>\n",
       "      <td>[FLIR]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2021-06-04</th>\n",
       "      <td>[OGN]</td>\n",
       "      <td>[HFC]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2021-07-21</th>\n",
       "      <td>[MRNA]</td>\n",
       "      <td>[ALXN]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2021-08-03</th>\n",
       "      <td>[BBWI]</td>\n",
       "      <td>[LB]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2021-08-30</th>\n",
       "      <td>[TECH]</td>\n",
       "      <td>[MXIM]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2021-09-20</th>\n",
       "      <td>[BRO, CDAY, MTCH]</td>\n",
       "      <td>[NOV, PRGO, UNM]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2021-10-04</th>\n",
       "      <td>[CTRA]</td>\n",
       "      <td>[COG]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2021-12-14</th>\n",
       "      <td>[EPAM]</td>\n",
       "      <td>[KSU]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2021-12-20</th>\n",
       "      <td>[FDS, SBNY, SEDG]</td>\n",
       "      <td>[HBI, LEG, WU]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2022-01-20</th>\n",
       "      <td>[WTW]</td>\n",
       "      <td>[WLTW]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2022-02-02</th>\n",
       "      <td>[CEG]</td>\n",
       "      <td>[GPS]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2022-02-15</th>\n",
       "      <td>[NDSN]</td>\n",
       "      <td>[XLNX]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2022-02-17</th>\n",
       "      <td>[PARA]</td>\n",
       "      <td>[VIAC]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2022-03-02</th>\n",
       "      <td>[MOH]</td>\n",
       "      <td>[INFO]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2022-04-04</th>\n",
       "      <td>[CPT]</td>\n",
       "      <td>[PBCT]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2022-04-11</th>\n",
       "      <td>[WBD]</td>\n",
       "      <td>[DISCA, DISCK]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2022-05-10</th>\n",
       "      <td>[BALL]</td>\n",
       "      <td>[BLL]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2022-06-08</th>\n",
       "      <td>[VICI]</td>\n",
       "      <td>[CERN]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2022-06-09</th>\n",
       "      <td>[META]</td>\n",
       "      <td>[FB]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2022-06-21</th>\n",
       "      <td>[KDP, ON]</td>\n",
       "      <td>[IPGP, UA, UAA]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2022-06-28</th>\n",
       "      <td>[ELV]</td>\n",
       "      <td>[ANTM]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2022-09-19</th>\n",
       "      <td>[CSGP, INVH]</td>\n",
       "      <td>[PENN, PVH]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2022-10-03</th>\n",
       "      <td>[EQT, PCG]</td>\n",
       "      <td>[CTXS, DRE]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2022-10-12</th>\n",
       "      <td>[TRGP]</td>\n",
       "      <td>[NLSN]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2022-11-01</th>\n",
       "      <td>[ACGL]</td>\n",
       "      <td>[TWTR]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2022-11-08</th>\n",
       "      <td>[GEN]</td>\n",
       "      <td>[NLOK]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2022-12-19</th>\n",
       "      <td>[FSLR]</td>\n",
       "      <td>[FBHS]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2022-12-22</th>\n",
       "      <td>[STLD]</td>\n",
       "      <td>[ABMD]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2023-01-04</th>\n",
       "      <td>[GEHC]</td>\n",
       "      <td>[VNO]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2023-03-15</th>\n",
       "      <td>[BG, PODD]</td>\n",
       "      <td>[SBNY, SIVB]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2023-03-20</th>\n",
       "      <td>[FICO]</td>\n",
       "      <td>[LUMN]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2023-05-04</th>\n",
       "      <td>[AXON]</td>\n",
       "      <td>[FRC]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2023-05-16</th>\n",
       "      <td>[RVTY]</td>\n",
       "      <td>[PKI]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2023-06-07</th>\n",
       "      <td>[FI]</td>\n",
       "      <td>[FISV]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2023-06-10</th>\n",
       "      <td>[EG]</td>\n",
       "      <td>[RE]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2023-06-20</th>\n",
       "      <td>[PANW]</td>\n",
       "      <td>[DISH]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2023-08-25</th>\n",
       "      <td>[KVUE]</td>\n",
       "      <td>[AAP]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2023-08-30</th>\n",
       "      <td>[COR]</td>\n",
       "      <td>[ABC]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2023-09-18</th>\n",
       "      <td>[ABNB, BX]</td>\n",
       "      <td>[LNC, NWL]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2023-10-02</th>\n",
       "      <td>[VLTO]</td>\n",
       "      <td>[DXC]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2023-10-18</th>\n",
       "      <td>[BLDR, HUBB, JBL, LULU, UBER]</td>\n",
       "      <td>[ALK, ATVI, OGN, SEDG, SEE]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2024-02-01</th>\n",
       "      <td>[DAY]</td>\n",
       "      <td>[CDAY]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2024-03-04</th>\n",
       "      <td>[DOC]</td>\n",
       "      <td>[PEAK]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2024-03-18</th>\n",
       "      <td>[DECK, SMCI]</td>\n",
       "      <td>[WHR, ZION]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2024-03-25</th>\n",
       "      <td>[CPAY]</td>\n",
       "      <td>[FLT]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2024-04-03</th>\n",
       "      <td>[GEV, SOLV]</td>\n",
       "      <td>[VFC, XRAY]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2024-05-08</th>\n",
       "      <td>[VST]</td>\n",
       "      <td>[PXD]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2024-06-24</th>\n",
       "      <td>[CRWD, GDDY, KKR]</td>\n",
       "      <td>[CMA, ILMN, RHI]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2024-07-08</th>\n",
       "      <td>[SW]</td>\n",
       "      <td>[WRK]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2024-09-23</th>\n",
       "      <td>[DELL, ERIE, PLTR]</td>\n",
       "      <td>[AAL, BIO, ETSY]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2024-09-30</th>\n",
       "      <td>[AMTM]</td>\n",
       "      <td>[]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2024-10-01</th>\n",
       "      <td>[]</td>\n",
       "      <td>[BBWI]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2024-11-26</th>\n",
       "      <td>[TPL]</td>\n",
       "      <td>[MRO]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2024-12-23</th>\n",
       "      <td>[APO, LII, WDAY]</td>\n",
       "      <td>[AMTM, CTLT, QRVO]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2025-04-24</th>\n",
       "      <td>[DASH, EXE, TKO, WSM]</td>\n",
       "      <td>[BWA, CE, FMC, TFX]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2025-05-19</th>\n",
       "      <td>[COIN]</td>\n",
       "      <td>[DFS]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2025-07-09</th>\n",
       "      <td>[DDOG]</td>\n",
       "      <td>[JNPR]</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                      add                       remove\n",
       "date                                                                  \n",
       "2019-01-18                          [TFX]                        [PCG]\n",
       "2019-02-15                          [ATO]                        [NFX]\n",
       "2019-02-27                          [WAB]                         [GT]\n",
       "2019-04-02                          [DOW]                        [BHF]\n",
       "2019-06-01                          [LHX]                        [HRS]\n",
       "2019-06-03                     [CTVA, DD]                  [DWDP, FLR]\n",
       "2019-06-07                         [AMCR]                        [MAT]\n",
       "2019-07-01                         [MKTX]                        [LLL]\n",
       "2019-07-15                         [TMUS]                        [RHT]\n",
       "2019-08-08                           [GL]                        [TMK]\n",
       "2019-08-09                    [IEX, LDOS]                    [APC, FL]\n",
       "2019-09-23                          [CDW]                        [TSS]\n",
       "2019-09-26                          [NVR]                        [JEF]\n",
       "2019-10-03                          [LVS]                       [NKTR]\n",
       "2019-10-18                          [BKR]                       [BHGE]\n",
       "2019-11-05                   [NLOK, PEAK]                  [HCP, SYMC]\n",
       "2019-11-21                          [NOW]                       [CELG]\n",
       "2019-12-05                    [VIAC, WRB]                  [CBS, VIAB]\n",
       "2019-12-09                    [ODFL, TFC]                   [BBT, STI]\n",
       "2019-12-10                            [J]                        [JEC]\n",
       "2019-12-23               [LYV, STE, ZBRA]             [AMG, MAC, TRIP]\n",
       "2020-01-28                         [PAYC]                        [WCG]\n",
       "2020-03-03                           [TT]                        [XEC]\n",
       "2020-04-03              [CARR, OTIS, RTX]                        [UTX]\n",
       "2020-04-06                          [HWM]               [ARNC, M, RTN]\n",
       "2020-05-12                    [DPZ, DXCM]                  [AGN, CPRI]\n",
       "2020-05-22                          [WST]                         [HP]\n",
       "2020-06-22                [BIO, TDY, TYL]              [ADS, HOG, JWN]\n",
       "2020-09-18                         [LUMN]                        [CTL]\n",
       "2020-09-21              [CTLT, ETSY, TER]             [COTY, HRB, KSS]\n",
       "2020-10-07                         [POOL]                       [ETFC]\n",
       "2020-10-12                          [VNT]                        [NBL]\n",
       "2020-11-17                         [VTRS]                        [MYL]\n",
       "2020-12-21                         [TSLA]                        [AIV]\n",
       "2021-01-07                         [ENPH]                        [TIF]\n",
       "2021-01-21                         [TRMB]                        [CXO]\n",
       "2021-02-12                         [MPWR]                        [FTI]\n",
       "2021-03-22        [CZR, GNRC, NXPI, PENN]         [FLS, SLG, VNT, XRX]\n",
       "2021-04-20                          [PTC]                        [VAR]\n",
       "2021-05-14                          [CRL]                       [FLIR]\n",
       "2021-06-04                          [OGN]                        [HFC]\n",
       "2021-07-21                         [MRNA]                       [ALXN]\n",
       "2021-08-03                         [BBWI]                         [LB]\n",
       "2021-08-30                         [TECH]                       [MXIM]\n",
       "2021-09-20              [BRO, CDAY, MTCH]             [NOV, PRGO, UNM]\n",
       "2021-10-04                         [CTRA]                        [COG]\n",
       "2021-12-14                         [EPAM]                        [KSU]\n",
       "2021-12-20              [FDS, SBNY, SEDG]               [HBI, LEG, WU]\n",
       "2022-01-20                          [WTW]                       [WLTW]\n",
       "2022-02-02                          [CEG]                        [GPS]\n",
       "2022-02-15                         [NDSN]                       [XLNX]\n",
       "2022-02-17                         [PARA]                       [VIAC]\n",
       "2022-03-02                          [MOH]                       [INFO]\n",
       "2022-04-04                          [CPT]                       [PBCT]\n",
       "2022-04-11                          [WBD]               [DISCA, DISCK]\n",
       "2022-05-10                         [BALL]                        [BLL]\n",
       "2022-06-08                         [VICI]                       [CERN]\n",
       "2022-06-09                         [META]                         [FB]\n",
       "2022-06-21                      [KDP, ON]              [IPGP, UA, UAA]\n",
       "2022-06-28                          [ELV]                       [ANTM]\n",
       "2022-09-19                   [CSGP, INVH]                  [PENN, PVH]\n",
       "2022-10-03                     [EQT, PCG]                  [CTXS, DRE]\n",
       "2022-10-12                         [TRGP]                       [NLSN]\n",
       "2022-11-01                         [ACGL]                       [TWTR]\n",
       "2022-11-08                          [GEN]                       [NLOK]\n",
       "2022-12-19                         [FSLR]                       [FBHS]\n",
       "2022-12-22                         [STLD]                       [ABMD]\n",
       "2023-01-04                         [GEHC]                        [VNO]\n",
       "2023-03-15                     [BG, PODD]                 [SBNY, SIVB]\n",
       "2023-03-20                         [FICO]                       [LUMN]\n",
       "2023-05-04                         [AXON]                        [FRC]\n",
       "2023-05-16                         [RVTY]                        [PKI]\n",
       "2023-06-07                           [FI]                       [FISV]\n",
       "2023-06-10                           [EG]                         [RE]\n",
       "2023-06-20                         [PANW]                       [DISH]\n",
       "2023-08-25                         [KVUE]                        [AAP]\n",
       "2023-08-30                          [COR]                        [ABC]\n",
       "2023-09-18                     [ABNB, BX]                   [LNC, NWL]\n",
       "2023-10-02                         [VLTO]                        [DXC]\n",
       "2023-10-18  [BLDR, HUBB, JBL, LULU, UBER]  [ALK, ATVI, OGN, SEDG, SEE]\n",
       "2024-02-01                          [DAY]                       [CDAY]\n",
       "2024-03-04                          [DOC]                       [PEAK]\n",
       "2024-03-18                   [DECK, SMCI]                  [WHR, ZION]\n",
       "2024-03-25                         [CPAY]                        [FLT]\n",
       "2024-04-03                    [GEV, SOLV]                  [VFC, XRAY]\n",
       "2024-05-08                          [VST]                        [PXD]\n",
       "2024-06-24              [CRWD, GDDY, KKR]             [CMA, ILMN, RHI]\n",
       "2024-07-08                           [SW]                        [WRK]\n",
       "2024-09-23             [DELL, ERIE, PLTR]             [AAL, BIO, ETSY]\n",
       "2024-09-30                         [AMTM]                           []\n",
       "2024-10-01                             []                       [BBWI]\n",
       "2024-11-26                          [TPL]                        [MRO]\n",
       "2024-12-23               [APO, LII, WDAY]           [AMTM, CTLT, QRVO]\n",
       "2025-04-24          [DASH, EXE, TKO, WSM]          [BWA, CE, FMC, TFX]\n",
       "2025-05-19                         [COIN]                        [DFS]\n",
       "2025-07-09                         [DDOG]                       [JNPR]"
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Read S&P 500 changes csv file.\n",
    "filename = 'sp500_changes_since_2019.csv'\n",
    "changes = get_table(filename)\n",
    "\n",
    "# Convert ticker column from csv to list, then sort.\n",
    "import pandas as pd\n",
    "\n",
    "changes['add'] = changes['add'].apply(lambda x: sorted(x.split(',')) if pd.notna(x) and x else [])\n",
    "changes['remove'] = changes['remove'].apply(lambda x: sorted(x.split(',')) if pd.notna(x) and x else [])\n",
    "changes"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-04-21T03:37:52.045224Z",
     "start_time": "2020-04-21T03:37:51.785132Z"
    }
   },
   "outputs": [],
   "source": [
    "# Copy the last row in dataframe, modify for changes, then append.\n",
    "for change in changes.itertuples():\n",
    "\n",
    "    new_row = df.tail(1)\n",
    "    \n",
    "    tickers = list(new_row['tickers'].iloc[0])\n",
    "    tickers += change.add\n",
    "    tickers = list(set(tickers) - set(change.remove))\n",
    "    tickers = sorted(tickers)\n",
    "    \n",
    "    d = {'date':change.Index, 'tickers':[tickers]}\n",
    "    new_entry = pd.DataFrame(d)\n",
    "    new_entry.set_index('date', inplace=True)\n",
    "    df = pd.concat([df, new_entry])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-04-21T03:37:52.098965Z",
     "start_time": "2020-04-21T03:37:52.056979Z"
    },
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>tickers</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>date</th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2024-11-26</th>\n",
       "      <td>[A, AAPL, ABBV, ABNB, ABT, ACGL, ACN, ADBE, AD...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2024-12-23</th>\n",
       "      <td>[A, AAPL, ABBV, ABNB, ABT, ACGL, ACN, ADBE, AD...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2025-04-24</th>\n",
       "      <td>[A, AAPL, ABBV, ABNB, ABT, ACGL, ACN, ADBE, AD...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2025-05-19</th>\n",
       "      <td>[A, AAPL, ABBV, ABNB, ABT, ACGL, ACN, ADBE, AD...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2025-07-09</th>\n",
       "      <td>[A, AAPL, ABBV, ABNB, ABT, ACGL, ACN, ADBE, AD...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                      tickers\n",
       "date                                                         \n",
       "2024-11-26  [A, AAPL, ABBV, ABNB, ABT, ACGL, ACN, ADBE, AD...\n",
       "2024-12-23  [A, AAPL, ABBV, ABNB, ABT, ACGL, ACN, ADBE, AD...\n",
       "2025-04-24  [A, AAPL, ABBV, ABNB, ABT, ACGL, ACN, ADBE, AD...\n",
       "2025-05-19  [A, AAPL, ABBV, ABNB, ABT, ACGL, ACN, ADBE, AD...\n",
       "2025-07-09  [A, AAPL, ABBV, ABNB, ABT, ACGL, ACN, ADBE, AD..."
      ]
     },
     "execution_count": 31,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.tail()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-04-21T03:37:52.144923Z",
     "start_time": "2020-04-21T03:37:52.109637Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[]"
      ]
     },
     "execution_count": 32,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# compare last row to current S&P500 list\n",
    "filename = 'sp500.csv'\n",
    "current = pd.read_csv(filename)\n",
    "current = list(current['Symbol'])\n",
    "last_entry = list(df['tickers'].iloc[-1])\n",
    "\n",
    "diff = list(set(current) - set(last_entry)) + list(set(last_entry) - set(current))\n",
    "diff"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-04-21T03:37:52.235860Z",
     "start_time": "2020-04-21T03:37:52.153035Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>tickers</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>date</th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1996-01-02</th>\n",
       "      <td>AAL,AAMRQ,AAPL,ABI,ABS,ABT,ABX,ACKH,ACV,ADM,AD...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1996-01-03</th>\n",
       "      <td>AAL,AAMRQ,AAPL,ABI,ABS,ABT,ABX,ACKH,ACV,ADM,AD...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1996-01-04</th>\n",
       "      <td>AAL,AAMRQ,AAPL,ABI,ABS,ABT,ABX,ACKH,ACV,ADM,AD...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1996-01-10</th>\n",
       "      <td>AAL,AAMRQ,AAPL,ABI,ABS,ABT,ABX,ACKH,ACV,ADM,AD...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1996-01-11</th>\n",
       "      <td>AAL,AAMRQ,AAPL,ABI,ABS,ABT,ABX,ACKH,ACV,ADM,AD...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                      tickers\n",
       "date                                                         \n",
       "1996-01-02  AAL,AAMRQ,AAPL,ABI,ABS,ABT,ABX,ACKH,ACV,ADM,AD...\n",
       "1996-01-03  AAL,AAMRQ,AAPL,ABI,ABS,ABT,ABX,ACKH,ACV,ADM,AD...\n",
       "1996-01-04  AAL,AAMRQ,AAPL,ABI,ABS,ABT,ABX,ACKH,ACV,ADM,AD...\n",
       "1996-01-10  AAL,AAMRQ,AAPL,ABI,ABS,ABT,ABX,ACKH,ACV,ADM,AD...\n",
       "1996-01-11  AAL,AAMRQ,AAPL,ABI,ABS,ABT,ABX,ACKH,ACV,ADM,AD..."
      ]
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Convert tickers column back to csv.\n",
    "df['tickers'] = df['tickers'].apply(lambda x: \",\".join(x))\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-04-21T03:37:52.430487Z",
     "start_time": "2020-04-21T03:37:52.246982Z"
    }
   },
   "outputs": [],
   "source": [
    "now = datetime.now()\n",
    "dt_string = now.strftime('%m-%d-%Y') # mm-dd-YYYY\n",
    "filename = 'S&P 500 Historical Components & Changes({}).csv'.format(dt_string)\n",
    "df.to_csv(filename)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.3"
  },
  "latex_envs": {
   "LaTeX_envs_menu_present": true,
   "autoclose": false,
   "autocomplete": true,
   "bibliofile": "biblio.bib",
   "cite_by": "apalike",
   "current_citInitial": 1,
   "eqLabelWithNumbers": true,
   "eqNumInitial": 1,
   "hotkeys": {
    "equation": "Ctrl-E",
    "itemize": "Ctrl-I"
   },
   "labels_anchors": false,
   "latex_user_defs": false,
   "report_style_numbering": false,
   "user_envs_cfg": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
